rm(list=ls())
gc()

## 06 - ESTIMATE TSMART PID MODELS


## LOAD PACKAGES
library(data.table)
library(lfe)
library(stringr)

data = fread('panel-analysis-2016-2020.csv.gz',select = c('DemDiff','RepDiff',"DemSpExpWhite_nohh_year1" ,   "DemSpExpWhiteDiff_nohh"  ,    "DemSpExpBlack_nohh_year1" ,   "DemSpExpBlackDiff_nohh"     ,
                                                                           "DemSpExpHispanic_nohh_year1", "DemSpExpHispanicDiff_nohh"  , "DemSpExpAsian_nohh_year1"  ,  "DemSpExpAsianDiff_nohh"  ,    "RepSpExpWhite_nohh_year1" ,  
                                                                            "RepSpExpWhiteDiff_nohh"  ,    "RepSpExpBlack_nohh_year1"   , "RepSpExpBlackDiff_nohh"   ,   "RepSpExpHispanic_nohh_year1", "RepSpExpHispanicDiff_nohh",  
                                                                            "RepSpExpAsian_nohh_year1" ,   "RepSpExpAsianDiff_nohh",'Age_year1','Race','Gender',
                                                                            'ZipCode','countyfips','Party_year1','hh.n.adj_year1','hh.n.adj_year2',
                                                                            'hh.d.adj_year1','hh.d.adj_year2','hh.r.adj_year1','hh.r.adj_year2',
                                                                            'Married_year1','State', 'WhiteBlockGroupDiff','MarriageDiff','AgeBlockGroupDiff','RegsBlockGroupDiff',
                                                                              'HHIncomeBlockGroupDiff' , 'CollegeBlockGroupDiff' , 'HomeownerBlockGroupDiff' , 'YearBuiltBlockGroupDiff' , 
                                                                              'DriveWorkBlockGroupDiff' , 'EmplBlockGroupDiff' , 'HouseValueBlockGroupDiff'))
  
data = data[Race%in%c('White','Black','Hispanic','Asian')]


  data[!Party_year1%in%c('Democrat','Republican'), Party_year1:='Other']
  data[countyfips=='',countyfips:=NA]
  data[ZipCode=='',ZipCode:=NA]
  data[Gender=='',Gender:=NA]
  # create household change variables
data[,hh.n.diff:=hh.n.adj_year2-hh.n.adj_year1]
data[,hh.d.diff:=hh.d.adj_year2-hh.d.adj_year1]
data[,hh.r.diff:=hh.r.adj_year2-hh.r.adj_year1]




for(r in c('Asian', 'Black','Hispanic','White')){
  data[['DemTreatment']] = data[[paste0('DemSpExp', r,'_nohh_year1')]]
  data[['RepTreatment']] = data[[paste0('RepSpExp', r,'_nohh_year1')]]
  data[['DemSpExpDiff_nohh']] = data[[paste0('DemSpExp', r,'Diff_nohh')]]
  data[['RepSpExpDiff_nohh']] = data[[paste0('RepSpExp', r,'Diff_nohh')]]
  
  
  data[, Interaction:=ifelse(Race==r, r, paste0('Non-',r))]
  
  
  # split data
  dems = data[Party_year1=='Democrat']
  reps = data[Party_year1=='Republican']
  oths = data[Party_year1=='Other']
  
  #rm(data)
  gc()
  
  # construct grouping variables based on age decile, gender, race, year1 treatment decile, and city
  # we can drop files where any of these are missing
  
  
  dems = dems[!is.na(Age_year1) & !is.na(Gender) & !is.na(Race) & !is.na(Married_year1) &!is.na(ZipCode)]
  reps = reps[!is.na(Age_year1) & !is.na(Gender) & !is.na(Race) & !is.na(Married_year1) & !is.na(ZipCode)]
  oths = oths[!is.na(Age_year1) & !is.na(Gender) & !is.na(Race) & !is.na(Married_year1) & !is.na(ZipCode)]
  
  dems[,AgeDecile:=cut(Age_year1,breaks=unique(as.numeric(quantile(Age_year1, probs=seq(0,1,by=.1),na.rm=T))),include.lowest=T)]
  reps[,AgeDecile:=cut(Age_year1,breaks=unique(as.numeric(quantile(Age_year1, probs=seq(0,1,by=.1),na.rm=T))),include.lowest=T)]
  oths[,AgeDecile:=cut(Age_year1,breaks=unique(as.numeric(quantile(Age_year1, probs=seq(0,1,by=.1),na.rm=T))),include.lowest=T)]
  

  # spatial seg treatment
  # democrat exposure
  dems[,DemSpDecile:=cut(DemTreatment,breaks=unique(as.numeric(quantile(DemTreatment, probs=seq(0,1,by=.1),na.rm=T))),include.lowest=T)]
  reps[,DemSpDecile:=cut(DemTreatment,breaks=unique(as.numeric(quantile(DemTreatment, probs=seq(0,1,by=.1),na.rm=T))),include.lowest=T)]
  oths[,DemSpDecile:=cut(DemTreatment,breaks=unique(as.numeric(quantile(DemTreatment, probs=seq(0,1,by=.1),na.rm=T))),include.lowest=T)]
  
  # republican exposure
  dems[,RepSpDecile:=cut(RepTreatment,breaks=unique(as.numeric(quantile(RepTreatment, probs=seq(0,1,by=.1),na.rm=T))),include.lowest=T)]
  reps[,RepSpDecile:=cut(RepTreatment,breaks=unique(as.numeric(quantile(RepTreatment, probs=seq(0,1,by=.1),na.rm=T))),include.lowest=T)]
  oths[,RepSpDecile:=cut(RepTreatment,breaks=unique(as.numeric(quantile(RepTreatment, probs=seq(0,1,by=.1),na.rm=T))),include.lowest=T)]
  
  
  
  #####
  # make groups
  # spatial seg treatment
  dems[,GroupSpDemExp:=paste(ZipCode,AgeDecile,Race, Gender, DemSpDecile,State,hh.n.adj_year1,hh.d.adj_year1,Married_year1, sep ='_')]
  reps[,GroupSpDemExp:=paste(ZipCode,AgeDecile,Race, Gender, DemSpDecile,State,hh.n.adj_year1,hh.d.adj_year1,Married_year1, sep ='_')]
  oths[,GroupSpDemExp:=paste(ZipCode,AgeDecile,Race, Gender, DemSpDecile,State,hh.n.adj_year1,hh.d.adj_year1,Married_year1, sep ='_')]
  
dems[,GroupSpRepExp:=paste(ZipCode,AgeDecile,Race, Gender, RepSpDecile,State,hh.n.adj_year1,hh.r.adj_year1,Married_year1, sep ='_')]
reps[,GroupSpRepExp:=paste(ZipCode,AgeDecile,Race, Gender, RepSpDecile,State,hh.n.adj_year1,hh.r.adj_year1,Married_year1, sep ='_')]
oths[,GroupSpRepExp:=paste(ZipCode,AgeDecile,Race, Gender, RepSpDecile,State,hh.n.adj_year1,hh.r.adj_year1,Married_year1, sep ='_')]

  
  
#   ###
  # estimate models
  
  ModelDemSpExpDems = summary(felm(DemDiff ~ Interaction:(DemSpExpDiff_nohh + hh.d.diff+hh.n.diff+WhiteBlockGroupDiff+MarriageDiff+AgeBlockGroupDiff+RegsBlockGroupDiff+
                                     HHIncomeBlockGroupDiff + CollegeBlockGroupDiff + HomeownerBlockGroupDiff + YearBuiltBlockGroupDiff + 
                                     DriveWorkBlockGroupDiff + EmplBlockGroupDiff + HouseValueBlockGroupDiff)|GroupSpDemExp|0|countyfips, data = dems[!grepl('_NA_',GroupSpDemExp)&!is.na(countyfips)]), robust =T)
  ModelDemSpExpReps = summary(felm(DemDiff ~ Interaction:(DemSpExpDiff_nohh+ hh.d.diff+hh.n.diff+WhiteBlockGroupDiff+MarriageDiff+AgeBlockGroupDiff+RegsBlockGroupDiff+
                                     HHIncomeBlockGroupDiff + CollegeBlockGroupDiff + HomeownerBlockGroupDiff + YearBuiltBlockGroupDiff + 
                                     DriveWorkBlockGroupDiff + EmplBlockGroupDiff + HouseValueBlockGroupDiff)|GroupSpDemExp|0|countyfips, data = reps[!grepl('_NA_',GroupSpDemExp)&!is.na(countyfips)]), robust =T)
  ModelDemSpExpOths = summary(felm(DemDiff ~ Interaction:(DemSpExpDiff_nohh+ hh.d.diff+hh.n.diff+WhiteBlockGroupDiff+MarriageDiff+AgeBlockGroupDiff+RegsBlockGroupDiff+
                                     HHIncomeBlockGroupDiff + CollegeBlockGroupDiff + HomeownerBlockGroupDiff + YearBuiltBlockGroupDiff + 
                                     DriveWorkBlockGroupDiff + EmplBlockGroupDiff + HouseValueBlockGroupDiff)|GroupSpDemExp|0|countyfips, data = oths[!grepl('_NA_',GroupSpDemExp)&!is.na(countyfips)]), robust =T)
  
  ModelRepSpExpDems = summary(felm(RepDiff ~ Interaction:(RepSpExpDiff_nohh+ hh.r.diff+hh.n.diff+WhiteBlockGroupDiff+MarriageDiff+AgeBlockGroupDiff+RegsBlockGroupDiff+
                                     HHIncomeBlockGroupDiff + CollegeBlockGroupDiff + HomeownerBlockGroupDiff + YearBuiltBlockGroupDiff + 
                                     DriveWorkBlockGroupDiff + EmplBlockGroupDiff + HouseValueBlockGroupDiff)|GroupSpRepExp|0|countyfips, data = dems[!grepl('_NA_',GroupSpRepExp)&!is.na(countyfips)]), robust =T)
  ModelRepSpExpReps = summary(felm(RepDiff ~ Interaction:(RepSpExpDiff_nohh+ hh.r.diff+hh.n.diff+WhiteBlockGroupDiff+MarriageDiff+AgeBlockGroupDiff+RegsBlockGroupDiff+
                                     HHIncomeBlockGroupDiff + CollegeBlockGroupDiff + HomeownerBlockGroupDiff + YearBuiltBlockGroupDiff + 
                                     DriveWorkBlockGroupDiff + EmplBlockGroupDiff + HouseValueBlockGroupDiff)|GroupSpRepExp|0|countyfips, data = reps[!grepl('_NA_',GroupSpRepExp)&!is.na(countyfips)]), robust =T)
  ModelRepSpExpOths = summary(felm(RepDiff ~ Interaction:(RepSpExpDiff_nohh+ hh.r.diff+hh.n.diff+WhiteBlockGroupDiff+MarriageDiff+AgeBlockGroupDiff+RegsBlockGroupDiff+
                                     HHIncomeBlockGroupDiff + CollegeBlockGroupDiff + HomeownerBlockGroupDiff + YearBuiltBlockGroupDiff + 
                                     DriveWorkBlockGroupDiff + EmplBlockGroupDiff + HouseValueBlockGroupDiff)|GroupSpRepExp|0|countyfips, data = oths[!grepl('_NA_',GroupSpRepExp)&!is.na(countyfips)]), robust =T)
  
  
  
  save(ModelDemSpExpDems, ModelDemSpExpReps, ModelDemSpExpOths, 
       ModelRepSpExpDems, ModelRepSpExpReps, ModelRepSpExpOths,
       
       
       file = paste0('results/current-results-2016-2020-',r,'-subset.Rdata'))
}
